; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=dse -S | FileCheck %s

define void @write4to7(ptr nocapture %p) {
; CHECK-LABEL: @write4to7(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i1 false)
  %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1
  store i32 1, ptr %arrayidx1, align 4
  ret void
}

define void @write4to7_weird_element_type(ptr nocapture %p) {
; CHECK-LABEL: @write4to7_weird_element_type(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 24, i1 false)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i1 false)
  %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1
  store i32 1, ptr %arrayidx1, align 4
  ret void
}

define void @write4to7_addrspace(ptr addrspace(1) nocapture %p) {
; CHECK-LABEL: @write4to7_addrspace(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARRAYIDX0]], i64 4
; CHECK-NEXT:    call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 1
; CHECK-NEXT:    store i32 1, ptr addrspace(1) [[ARRAYIDX1]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr addrspace(1) %p, i64 1
  call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %arrayidx0, i8 0, i64 28, i1 false)
  %arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %p, i64 1
  store i32 1, ptr addrspace(1) %arrayidx1, align 4
  ret void
}

define void @write4to7_atomic(ptr nocapture %p) {
; CHECK-LABEL: @write4to7_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
; CHECK-NEXT:    store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i32 4)
  %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1
  store atomic i32 1, ptr %arrayidx1 unordered, align 4
  ret void
}

define void @write0to3(ptr nocapture %p) {
; CHECK-LABEL: @write0to3(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT:    store i32 1, ptr [[P]], align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.p0.i64(ptr align 4 %p, i8 0, i64 28, i1 false)
  store i32 1, ptr %p, align 4
  ret void
}

define void @write0to3_atomic(ptr nocapture %p) {
; CHECK-LABEL: @write0to3_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4)
; CHECK-NEXT:    store atomic i32 1, ptr [[P]] unordered, align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %p, i8 0, i64 28, i32 4)
  store atomic i32 1, ptr %p unordered, align 4
  ret void
}

; Atomicity of the store is weaker from the memset
define void @write0to3_atomic_weaker(ptr nocapture %p) {
; CHECK-LABEL: @write0to3_atomic_weaker(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4)
; CHECK-NEXT:    store i32 1, ptr [[P]], align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %p, i8 0, i64 28, i32 4)
  store i32 1, ptr %p, align 4
  ret void
}

define void @write0to7(ptr nocapture %p) {
; CHECK-LABEL: @write0to7(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT:    store i64 1, ptr [[P]], align 8
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.p0.i64(ptr align 4 %p, i8 0, i64 32, i1 false)
  store i64 1, ptr %p, align 8
  ret void
}

; Changing the memset start and length is okay here because the
; store is a multiple of the memset element size
define void @write0to7_atomic(ptr nocapture %p) {
; CHECK-LABEL: @write0to7_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4)
; CHECK-NEXT:    store atomic i64 1, ptr [[P]] unordered, align 8
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %p, i8 0, i64 32, i32 4)
  store atomic i64 1, ptr %p unordered, align 8
  ret void
}

define void @write0to7_2(ptr nocapture %p) {
; CHECK-LABEL: @write0to7_2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT:    store i64 1, ptr [[P]], align 8
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i1 false)
  store i64 1, ptr %p, align 8
  ret void
}

define void @write0to7_2_atomic(ptr nocapture %p) {
; CHECK-LABEL: @write0to7_2_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4)
; CHECK-NEXT:    store atomic i64 1, ptr [[P]] unordered, align 8
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i32 4)
  store atomic i64 1, ptr %p unordered, align 8
  ret void
}

; We do not trim the beginning of the eariler write if the alignment of the
; start pointer is changed.
define void @dontwrite0to3_align8(ptr nocapture %p) {
; CHECK-LABEL: @dontwrite0to3_align8(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 32, i1 false)
; CHECK-NEXT:    store i32 1, ptr [[P]], align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.p0.i64(ptr align 8 %p, i8 0, i64 32, i1 false)
  store i32 1, ptr %p, align 4
  ret void
}

define void @dontwrite0to3_align8_atomic(ptr nocapture %p) {
; CHECK-LABEL: @dontwrite0to3_align8_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 32, i32 4)
; CHECK-NEXT:    store atomic i32 1, ptr [[P]] unordered, align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %p, i8 0, i64 32, i32 4)
  store atomic i32 1, ptr %p unordered, align 4
  ret void
}

define void @dontwrite0to1(ptr nocapture %p) {
; CHECK-LABEL: @dontwrite0to1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i1 false)
; CHECK-NEXT:    store i16 1, ptr [[P]], align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.p0.i64(ptr align 4 %p, i8 0, i64 32, i1 false)
  store i16 1, ptr %p, align 4
  ret void
}

define void @dontwrite0to1_atomic(ptr nocapture %p) {
; CHECK-LABEL: @dontwrite0to1_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i32 4)
; CHECK-NEXT:    store atomic i16 1, ptr [[P]] unordered, align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %p, i8 0, i64 32, i32 4)
  store atomic i16 1, ptr %p unordered, align 4
  ret void
}

define void @write2to10(ptr nocapture %p) {
; CHECK-LABEL: @write2to10(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false)
; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
; CHECK-NEXT:    store i64 1, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 32, i1 false)
  %arrayidx2 = getelementptr inbounds i16, ptr %p, i64 1
  store i64 1, ptr %arrayidx2, align 8
  ret void
}

define void @write2to10_atomic(ptr nocapture %p) {
; CHECK-LABEL: @write2to10_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i32 4)
; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
; CHECK-NEXT:    store atomic i64 1, ptr [[ARRAYIDX2]] unordered, align 8
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 32, i32 4)
  %arrayidx2 = getelementptr inbounds i16, ptr %p, i64 1
  store atomic i64 1, ptr %arrayidx2 unordered, align 8
  ret void
}

define void @write8To15AndThen0To7(ptr nocapture %P) {
; CHECK-LABEL: @write8To15AndThen0To7(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
; CHECK-NEXT:    store i64 1, ptr [[BASE64_1]], align 4
; CHECK-NEXT:    store i64 2, ptr [[P]], align 4
; CHECK-NEXT:    ret void
;
entry:

  tail call void @llvm.memset.p0.i64(ptr align 8 %P, i8 0, i64 32, i1 false)

  %base64_1 = getelementptr inbounds i64, ptr %P, i64 1

  store i64 1, ptr %base64_1
  store i64 2, ptr %P
  ret void
}

define void @write8To15AndThen0To7_atomic(ptr nocapture %P) {
; CHECK-LABEL: @write8To15AndThen0To7_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i32 8)
; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
; CHECK-NEXT:    store atomic i64 1, ptr [[BASE64_1]] unordered, align 8
; CHECK-NEXT:    store atomic i64 2, ptr [[P]] unordered, align 8
; CHECK-NEXT:    ret void
;
entry:

  tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %P, i8 0, i64 32, i32 8)

  %base64_1 = getelementptr inbounds i64, ptr %P, i64 1

  store atomic i64 1, ptr %base64_1 unordered, align 8
  store atomic i64 2, ptr %P unordered, align 8
  ret void
}

define void @write8To15AndThen0To7_atomic_weaker(ptr nocapture %P) {
; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i32 8)
; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
; CHECK-NEXT:    store atomic i64 1, ptr [[BASE64_1]] unordered, align 8
; CHECK-NEXT:    store i64 2, ptr [[P]], align 8
; CHECK-NEXT:    ret void
;
entry:

  tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %P, i8 0, i64 32, i32 8)

  %base64_1 = getelementptr inbounds i64, ptr %P, i64 1

  store atomic i64 1, ptr %base64_1 unordered, align 8
  store i64 2, ptr %P, align 8
  ret void
}

define void @write8To15AndThen0To7_atomic_weaker_2(ptr nocapture %P) {
; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker_2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i32 8)
; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
; CHECK-NEXT:    store i64 1, ptr [[BASE64_1]], align 8
; CHECK-NEXT:    store atomic i64 2, ptr [[P]] unordered, align 8
; CHECK-NEXT:    ret void
;
entry:

  tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %P, i8 0, i64 32, i32 8)

  %base64_1 = getelementptr inbounds i64, ptr %P, i64 1

  store i64 1, ptr %base64_1, align 8
  store atomic i64 2, ptr %P unordered, align 8
  ret void
}

declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture, i8, i64, i1) nounwind
declare void @llvm.memset.element.unordered.atomic.p0.i64(ptr nocapture, i8, i64, i32) nounwind

define void @ow_begin_align1(ptr nocapture %p) {
; CHECK-LABEL: @ow_begin_align1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 7
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[TMP0]], i8 0, i64 25, i1 false)
; CHECK-NEXT:    store i64 1, ptr [[P]], align 1
; CHECK-NEXT:    ret void
;
entry:
  %p1 = getelementptr inbounds i8, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 1 %p1, i8 0, i64 32, i1 false)
  store i64 1, ptr %p, align 1
  ret void
}

define void @ow_end_align4(ptr nocapture %p) {
; CHECK-LABEL: @ow_end_align4(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 4
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false)
; CHECK-NEXT:    store i64 1, ptr [[P]], align 1
; CHECK-NEXT:    ret void
;
entry:
  %p1 = getelementptr inbounds i8, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 4 %p1, i8 0, i64 32, i1 false)
  store i64 1, ptr %p, align 1
  ret void
}

define void @ow_end_align8(ptr nocapture %p) {
; CHECK-LABEL: @ow_end_align8(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[P1]], i8 0, i64 32, i1 false)
; CHECK-NEXT:    store i64 1, ptr [[P]], align 1
; CHECK-NEXT:    ret void
;
entry:
  %p1 = getelementptr inbounds i8, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 8 %p1, i8 0, i64 32, i1 false)
  store i64 1, ptr %p, align 1
  ret void
}
